The function call of FnFMining wraps up all the helper functions in this Notebook. It's using the IDs harvested in the last Notebook and saved in the NetWork-files to collect data about these IDs.

The function takes as input the twitterfiles of each category (NatBibTwitter.csv etc.), opens for each library in this file the corresponding NetWork-[datestamp]-file and returns

  • for each library

    • a csv-file with the Friends-IDs and Data (if the library is following other accounts; there are some libraries which are actually not following)
    • a csv-file with the Followers-IDs and Data
  • a list of the files for each library category, called NatBib_Files.txt etc.

  • also an error message for user IDs which could not be accessed. This is, most of the time, because the accounts were deleted. One could check these IDs via a service as e.g. http://tweeterid.com.

The Friends and Followers files contain List of Dictionaries (LoD) with the keys: friends_description, friends_user_id, friends_location, friends_screen_name, and followers_description, followers_user_id, followers_location, followers_screen_name respectively.


In [9]:
# Code from MTSW 2Ed.
# cf. https://github.com/ptwobrussell/Mining-the-Social-Web-2nd-Edition

import twitter

def oauth_login():
    # XXX: Go to http://twitter.com/apps/new to create an app and get values
    # for these credentials that you'll need to provide in place of these
    # empty string values that are defined as placeholders.
    # See https://dev.twitter.com/docs/auth/oauth for more information 
    # on Twitter's OAuth implementation.
    
    CONSUMER_KEY = 
    CONSUMER_SECRET =
    OAUTH_TOKEN = 
    OAUTH_TOKEN_SECRET =    
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)
    
    twitter_api = twitter.Twitter(auth=auth)
    return twitter_api

# Sample usage
twitter_api = oauth_login()

In [10]:
#importing libraries
import json   #for pretty printing
import time   #for calculating Tweets per day
import operator #for sorting dictionaries
from collections import Counter #for turning lists to dictionaries etc.
from prettytable import PrettyTable   #for pretty printing in a table


# helper function Prettyprint taken from MTSW 2Ed.

def prettyPrint(Sp_1, Sp_2, counted_list_of_tuples):
    ptLang = PrettyTable(field_names=[Sp_1, Sp_2])
    [ptLang.add_row(kv) for kv in counted_list_of_tuples]
    ptLang.align[Sp_1], ptLang.align[Sp_2] = 'l', 'r'
    print ptLang
    
# helper function: safe the results as a csv-file

#import & export CSV
import csv

def impCSV(input_file):
    '''
    input_file = csv with keys: "URL", "Twitter"
    output = list of dictionaries
    '''
    f = open(input_file, 'r')
    d = csv.DictReader(f)
    LoD = []   # list of dictionaries
    for row in d:
        LoD.append(row)
    f.close()
    return LoD

def exp2CSV(listOfDict, filename):
    '''
    arguments = list of dictionaries, filename
    output = saves file to cwd (current working directory)
    '''
    outputfile = filename
    keyz = listOfDict[0].keys()
    f = open(outputfile,'w')
    dict_writer = csv.DictWriter(f,keyz)
    dict_writer.writer.writerow(keyz)
    dict_writer.writerows(listOfDict)
    f.close()

In [11]:
# Both functions from MTSW 2 Ed.

import sys
from urllib2 import URLError
from httplib import BadStatusLine

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw): 
    
    # A nested helper function that handles common HTTPErrors. Return an updated
    # value for wait_period if the problem is a 500 level error. Block until the
    # rate limit is reset if it's a rate limiting issue (429 error). Returns None
    # for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
    
        if wait_period > 3600: # Seconds
            print >> sys.stderr, 'Too many retries. Quitting.'
            raise e
    
        # See https://dev.twitter.com/docs/error-codes-responses for common codes
    
        if e.e.code == 401:
            print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
            return None
        elif e.e.code == 404:
            print >> sys.stderr, 'Encountered 404 Error (Not Found)'
            return None
        elif e.e.code == 429: 
            print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
            if sleep_when_rate_limited:
                print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print >> sys.stderr, '...ZzZ...Awake now and trying again.'
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print >> sys.stderr, 'Encountered %i Error. Retrying in %i seconds' % \
                (e.e.code, wait_period)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e

    # End of nested helper function
    
    wait_period = 2 
    error_count = 0 

    while True:
        try:
            return twitter_api_func(*args, **kw)
        except twitter.api.TwitterHTTPError, e:
            error_count = 0 
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "URLError encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise
        except BadStatusLine, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "BadStatusLine encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise

# See https://dev.twitter.com/docs/api/1.1/get/users/lookup for 
# twitter_api.users.lookup


def get_user_profile(twitter_api, screen_names=None, user_ids=None):
   
    # Must have either screen_name or user_id (logical xor)
    assert (screen_names != None) != (user_ids != None), \
    "Must have screen_names or user_ids, but not both"
    
    items_to_info = {}

    items = screen_names or user_ids
    
    while len(items) > 0:

        # Process 100 items at a time per the API specifications for /users/lookup.
        # See https://dev.twitter.com/docs/api/1.1/get/users/lookup for details.
        
        items_str = ','.join([str(item) for item in items[:100]])
        items = items[100:]

        if screen_names:
            response = make_twitter_request(twitter_api.users.lookup, 
                                            screen_name=items_str)
        else: # user_ids
            response = make_twitter_request(twitter_api.users.lookup, 
                                            user_id=items_str)
    
        for user_info in response:
            if screen_names:
                items_to_info[user_info['screen_name']] = user_info
            else: # user_ids
                items_to_info[user_info['id']] = user_info

    return items_to_info

In [18]:
def lookUpProfilesFriends(listOfIDs):
    ''' 
    input: list of IDs of Friends or Followers
    output: list dictionaries with keys 'user_id', 'screen_name', 'location', 'description'
    '''
    LoD = []
    errorIDs = []
    
    profiles = get_user_profile(twitter_api, user_ids=listOfIDs)
    try:
        for e in listOfIDs:
            infoDic = {}
            infoDic['friends_user_id'] = e
            infoDic['friends_screen_name'] = profiles[e]['screen_name']
            infoDic['friends_location'] = (profiles[e]['location']).encode('utf-8')
            infoDic['friends_description'] = (profiles[e]['description']).encode('utf-8')
            LoD.append(infoDic)
    except:
        errorIDs.append(e)
    if len(errorIDs) > 0:
        print
        print 'Error for these IDs:', errorIDs
        print
    return LoD


def lookUpProfilesFollowers(listOfIDs):
    ''' 
    input: list of IDs of Friends or Followers
    output: list dictionaries with keys 'user_id', 'screen_name', 'location', 'description'
    '''
    LoD = []
    errorIDs = []
    profiles = get_user_profile(twitter_api, user_ids=listOfIDs)
    
    try:
        for e in listOfIDs:
            infoDic = {}
            infoDic['followers_user_id'] = e
            infoDic['followers_screen_name'] = profiles[e]['screen_name']
            infoDic['followers_location'] = (profiles[e]['location']).encode('utf-8')
            infoDic['followers_description'] = (profiles[e]['description']).encode('utf-8')
            LoD.append(infoDic)
    except:
        errorIDs.append(e)
    if len(errorIDs) > 0:
        print 'Error for these IDs:', errorIDs
    
    return LoD


def wrapLookUp(dictOfFnFs):
    '''
    input: dict of FnFs of a lib (with keys 'followers_ids', 'friends_ids', 'screen_name' (of the lib)
    output: a list of filenames
    saves two files: <twitterhandel>_Friends_<datestamp>.csv and <twitterhandel>_Followers_<datestamp>.csv
    '''
    f1 = dictOfFnFs['friends_ids']
    f2 = dictOfFnFs['followers_ids']
    
    #in case the list is converted to a str
    if type(f1) == str and f1 != '[]':
        f11 = f1.strip('[]')
        f1 = [int(s) for s in f11.split(',')]
    else:
        pass
    if type(f2) == str and f2 != '[]':
        f21 = f2.strip('[]')
        f2 = [int(s) for s in f21.split(',')]
    else:
        pass
    
    if len(f1) > 0 and type(f1) == list:
        friends = lookUpProfilesFriends(f1)
    else:
        friends = []
    if len(f2) > 0 and type(f2) == list:
        followers = lookUpProfilesFollowers(f2)
    else:
        followers = []
            
    #creating the filename of the csv with current datestamp 
    import datetime
    datestamp = datetime.datetime.now().strftime('%Y-%m-%d')
    filename_friends = dictOfFnFs['screen_name'] + '_Friends_' + datestamp + '.csv'
    filename_followers = dictOfFnFs['screen_name'] + '_Followers_' + datestamp + '.csv'
    LoFilenames = [] # [filename_friends, filename_followers]
    
    #export as CSV to CWD
    if len(friends) > 0:
        exp2CSV(friends, filename_friends)
        LoFilenames.append(filename_friends)
    if len(followers) > 0:
        exp2CSV(followers, filename_followers)
        LoFilenames.append(filename_followers)
  
    return LoFilenames

In [19]:
def FnFMining(Twitterfile, datestamp):
    '''
    input: the NatBibTwitter.csv etc. filenames and the datestamp of the'_NetWork_2014-03-11.csv' file.
    (the library Twitter name will be added).
    '''
    import pickle                      # for saving the list to a file
    
    f = impCSV(Twitterfile)
    listOfFilenames = []
    for e in f:
        n = e['Twitter']                # get Twitter handel of the library
        filename = n + '_NetWork_' + datestamp + '.csv' # create the filename for the library
        print filename
        b = impCSV(filename)            # import this file
        p = wrapLookUp(b[0])            # get description etc. for the FnFs of the library
        
        print p                        # print the filenames for each library
        listOfFilenames += p

    # for saving the list to a file    
    filename2 = Twitterfile[:-11] + '_Files.txt'   # creating a filename like UniBibFiles.txt
    print filename2
    with open(filename2, 'wb') as f:
        pickle.dump(listOfFilenames, f)

Function Calls

National Libraries


In [20]:
FnFMining('NatBibTwitter2.csv', '2014-04-06')


bsb_muenchen_NetWork_2014-04-06.csv
['bsb_muenchen_Friends_2014-04-07.csv', 'bsb_muenchen_Followers_2014-04-07.csv']
dnb_aktuelles_NetWork_2014-04-06.csv
['dnb_aktuelles_Friends_2014-04-07.csv', 'dnb_aktuelles_Followers_2014-04-07.csv']
sbb_news_NetWork_2014-04-06.csv
Error for these IDs: [12456992]
['sbb_news_Followers_2014-04-07.csv']
NatBibT_Files.txt

University Libraries


In [7]:
FnFMining('UniBibTwitter2.csv', '2014-04-06')


ub_oldenburg_NetWork_2014-04-06.csv
['ub_oldenburg_Friends_2014-04-06.csv', 'ub_oldenburg_Followers_2014-04-06.csv']
hsubib_NetWork_2014-04-06.csv
['hsubib_Friends_2014-04-06.csv', 'hsubib_Followers_2014-04-06.csv']
ubhumboldtuni_NetWork_2014-04-06.csv
['ubhumboldtuni_Friends_2014-04-06.csv', 'ubhumboldtuni_Followers_2014-04-06.csv']
kitbibliothek_NetWork_2014-04-06.csv
['kitbibliothek_Friends_2014-04-06.csv', 'kitbibliothek_Followers_2014-04-06.csv']
kizuulm_NetWork_2014-04-06.csv
['kizuulm_Friends_2014-04-06.csv', 'kizuulm_Followers_2014-04-06.csv']
subugoe_NetWork_2014-04-06.csv
Error for these IDs: [50072429]
['subugoe_Friends_2014-04-06.csv', 'subugoe_Followers_2014-04-06.csv']
ubbochum_NetWork_2014-04-06.csv
['ubbochum_Friends_2014-04-06.csv', 'ubbochum_Followers_2014-04-06.csv']
slubdresden_NetWork_2014-04-06.csv
Error for these IDs: [74824233]
['slubdresden_Friends_2014-04-06.csv', 'slubdresden_Followers_2014-04-06.csv']
elibbremen_NetWork_2014-04-06.csv
['elibbremen_Friends_2014-04-06.csv', 'elibbremen_Followers_2014-04-06.csv']
stabihh_NetWork_2014-04-06.csv
['stabihh_Friends_2014-04-06.csv', 'stabihh_Followers_2014-04-06.csv']
Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...
ub_tu_berlin_NetWork_2014-04-06.csv
['ub_tu_berlin_Friends_2014-04-06.csv', 'ub_tu_berlin_Followers_2014-04-06.csv']
tubhh_NetWork_2014-04-06.csv
['tubhh_Friends_2014-04-06.csv', 'tubhh_Followers_2014-04-06.csv']
ulbbonn_NetWork_2014-04-06.csv
['ulbbonn_Friends_2014-04-06.csv', 'ulbbonn_Followers_2014-04-06.csv']
ubbayreuth_info_NetWork_2014-04-06.csv
['ubbayreuth_info_Friends_2014-04-06.csv', 'ubbayreuth_info_Followers_2014-04-06.csv']
ub_bi_NetWork_2014-04-06.csv
['ub_bi_Friends_2014-04-06.csv', 'ub_bi_Followers_2014-04-06.csv']
unibib_bs_NetWork_2014-04-06.csv
['unibib_bs_Friends_2014-04-06.csv', 'unibib_bs_Followers_2014-04-06.csv']
ub_wue_NetWork_2014-04-06.csv
['ub_wue_Friends_2014-04-06.csv', 'ub_wue_Followers_2014-04-06.csv']
unibib_NetWork_2014-04-06.csv
['unibib_Friends_2014-04-06.csv', 'unibib_Followers_2014-04-06.csv']
...ZzZ...Awake now and trying again.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
URLError encountered. Continuing.
ubdue_NetWork_2014-04-06.csv
['ubdue_Friends_2014-04-06.csv', 'ubdue_Followers_2014-04-06.csv']
ub_fau_NetWork_2014-04-06.csv
['ub_fau_Friends_2014-04-06.csv', 'ub_fau_Followers_2014-04-06.csv']
tibub_NetWork_2014-04-06.csv
['tibub_Friends_2014-04-06.csv', 'tibub_Followers_2014-04-06.csv']
ubkassel_NetWork_2014-04-06.csv
['ubkassel_Friends_2014-04-06.csv', 'ubkassel_Followers_2014-04-06.csv']
ubleipzig_NetWork_2014-04-06.csv
Error for these IDs: [166163038]
['ubleipzig_Friends_2014-04-06.csv', 'ubleipzig_Followers_2014-04-06.csv']
ubmainz_NetWork_2014-04-06.csv
['ubmainz_Friends_2014-04-06.csv', 'ubmainz_Followers_2014-04-06.csv']
unibib_mr_NetWork_2014-04-06.csv
['unibib_mr_Friends_2014-04-06.csv', 'unibib_mr_Followers_2014-04-06.csv']
ubreg_NetWork_2014-04-06.csv
Error for these IDs: [334516554]
['ubreg_Friends_2014-04-06.csv', 'ubreg_Followers_2014-04-06.csv']
zbsport_NetWork_2014-04-06.csv
['zbsport_Friends_2014-04-06.csv', 'zbsport_Followers_2014-04-06.csv']
UniBibT_Files.txt

Public Libraries


In [8]:
FnFMining('OeBibTwitter2.csv', '2014-04-06')


stb_bielefeld_NetWork_2014-04-06.csv
Error for these IDs: [23788414]
['stb_bielefeld_Friends_2014-04-06.csv', 'stb_bielefeld_Followers_2014-04-06.csv']
stabi_bremen_NetWork_2014-04-06.csv
['stabi_bremen_Friends_2014-04-06.csv', 'stabi_bremen_Followers_2014-04-06.csv']
stbessen_NetWork_2014-04-06.csv
['stbessen_Friends_2014-04-06.csv', 'stbessen_Followers_2014-04-06.csv']
stbibkoeln_NetWork_2014-04-06.csv

Error for these IDs: [171132336]

['stbibkoeln_Friends_2014-04-06.csv', 'stbibkoeln_Followers_2014-04-06.csv']
stadtbueduedorf_NetWork_2014-04-06.csv
['stadtbueduedorf_Friends_2014-04-06.csv', 'stadtbueduedorf_Followers_2014-04-06.csv']
hoeb4u_NetWork_2014-04-06.csv
['hoeb4u_Friends_2014-04-06.csv', 'hoeb4u_Followers_2014-04-06.csv']
bibliothek_wit_NetWork_2014-04-06.csv
['bibliothek_wit_Friends_2014-04-06.csv', 'bibliothek_wit_Followers_2014-04-06.csv']
mediothek_NetWork_2014-04-06.csv
['mediothek_Friends_2014-04-06.csv', 'mediothek_Followers_2014-04-06.csv']
stabi_erlangen_NetWork_2014-04-06.csv
['stabi_erlangen_Friends_2014-04-06.csv', 'stabi_erlangen_Followers_2014-04-06.csv']
stabifr_NetWork_2014-04-06.csv
['stabifr_Friends_2014-04-06.csv', 'stabifr_Followers_2014-04-06.csv']
stabigoe_NetWork_2014-04-06.csv
['stabigoe_Friends_2014-04-06.csv', 'stabigoe_Followers_2014-04-06.csv']
stbneuss_NetWork_2014-04-06.csv
['stbneuss_Friends_2014-04-06.csv', 'stbneuss_Followers_2014-04-06.csv']
stbsalzgitter_NetWork_2014-04-06.csv
['stbsalzgitter_Friends_2014-04-06.csv', 'stbsalzgitter_Followers_2014-04-06.csv']
stabiso_NetWork_2014-04-06.csv
['stabiso_Friends_2014-04-06.csv', 'stabiso_Followers_2014-04-06.csv']
sbchemnitz_NetWork_2014-04-06.csv
['sbchemnitz_Friends_2014-04-06.csv', 'sbchemnitz_Followers_2014-04-06.csv']
stabiguetersloh_NetWork_2014-04-06.csv
['stabiguetersloh_Friends_2014-04-06.csv', 'stabiguetersloh_Followers_2014-04-06.csv']
stabi_mannheim_NetWork_2014-04-06.csv
['stabi_mannheim_Friends_2014-04-06.csv', 'stabi_mannheim_Followers_2014-04-06.csv']
stadtbibliothek_NetWork_2014-04-06.csv
['stadtbibliothek_Friends_2014-04-06.csv', 'stadtbibliothek_Followers_2014-04-06.csv']
stadtbibmg_NetWork_2014-04-06.csv
['stadtbibmg_Friends_2014-04-06.csv', 'stadtbibmg_Followers_2014-04-06.csv']
URLError encountered. Continuing.
URLError encountered. Continuing.
buecherei_ms_NetWork_2014-04-06.csv
['buecherei_ms_Friends_2014-04-06.csv', 'buecherei_ms_Followers_2014-04-06.csv']
stabuewuerzburg_NetWork_2014-04-06.csv
['stabuewuerzburg_Friends_2014-04-06.csv', 'stabuewuerzburg_Followers_2014-04-06.csv']
OeBibT_Files.txt


In [ ]: